home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
LiquidLibrary 2005 September
/
LiquidLibrary 2005 Sep - Disc 1.iso
/
pc
/
Portfolio Browser
/
Filters
/
PDF
/
LIB
/
acc_ps2t.ps
next >
Wrap
Text File
|
2003-01-03
|
21KB
|
577 lines
% Copyright (c) 2001, AccuSoft Corporation.
% All rights reserved.
%
% This is a PostScript library for sending GhostScript
% rendering text to stdout stream. The library is used
% by PDF extension of the ImageGear DLL to extract text
% from PostScript files.
%
% Created: 09/17/2001 AlexR
% Last modified: 02/12/2002
% Revision: 2
%
% The library redefines show, ashow, widthshow, awidthshow,
% kshow, xshow, yshow and xyshow text rendered operators as
% procedures to extract text translated to ISOLatin1 encoding.
% The output has following format:
%
% #S <x0> <y0> <len1> <len2> <string> <x1> <y1>
% This line indicates displaying text
%
% #P
% This line indicates the end of the page
% <x0>, <y0>, <x1>, <y1> are an integer coordinates in 1/100 of device units.
% The (<x0>,<y0>) is start point of painted text and (<x1>, <y1>) is end point.
% These two points specify the direction of text and its location.
% The <string> is extracted text that consist of printable characters in ISOLatin1
% encoding. <len2> is length of <string>. <len1> is a number of glyphs represented
% by <string>. <len2> can be not equal to <len1> (for example the character with
% glyph name /trademark is represented by "^TM").
userdict begin
% Redefines the operator if it already exists
% Used for redefining standard "show" operators
/IG_redef % name proc IG_redef -
{
1 index where
{ pop def }
{ pop pop } ifelse
} bind def
/IG_PStoTxtDict 50 dict def
% Declare all names in our private dictionary
//IG_PStoTxtDict begin
% OT1 encoding. It is used for self-named glyphs of Type 3 fonts.
/_DvipsOT1
[
% 0
(\\Gamma) (\\Delta) (\\Theta) (\\Lambda) (\\Xi) (\\Pi) (\\Sigma) (\\Upsilon)
(\\Phi) (\\Psi) (\\Omega) (ff) (fi) (fl) (ffi) (ffl) (i) (j) (`)
(') (\237) (\226) (\257) (\232) (\270) (\337) (ae) (oe) (\370) (AE) (OE)
(\330) (/) (!) ('') (#) ($) (%) (&) (') (\() (\)) (*) (+) (,) (\255) (.) (/)
(0) (1) (2) (3) (4) (5) (6) (7) (8) (9) (:) (;) (!) (=) (?) (?) (@)
% 65
(A) (B) (C) (D) (E) (F) (G) (H) (I) (J) (K) (L) (M) (N) (O) (P) (Q) (R) (S)
(T) (U) (V) (W) (X) (Y) (Z)
% 91
([) (``) (]) (^) (_) (`)
% 97
(a) (b) (c) (d) (e) (f) (g) (h) (i) (j) (k) (l) (m) (n) (o) (p) (q) (r) (s)
(t) (u) (v) (w) (x) (y) (z)
% 123
(--) (---) (\235) (~) (\250)
] def
% Glyph names used in most True Type encoding vectors (for Type 42 fonts)
/_TTGlyphNames1
[
% 0 gap
/G00 /G01 /G02 /G03 /G04 /G05 /G06 /G07 /G08 /G09 /G0A /G0B /G0C /G0D /G0E /G0F
/G10 /G11 /G12 /G13 /G14 /G15 /G16 /G17 /G18 /G19 /G1A /G1B /G1C /G1D /G1E /G1F
% 32
/G20 /G21 /G22 /G23 /G24 /G25 /G26 /G27 /G28 /G29 /G2A /G2B /G2C /G2D /G2E /G2F
/G30 /G31 /G32 /G33 /G34 /G35 /G36 /G37 /G38 /G39 /G3A /G3B /G3C /G3D /G3E /G3F
/G40 /G41 /G42 /G43 /G44 /G45 /G46 /G47 /G48 /G49 /G4A /G4B /G4C /G4D /G4E /G4F
/G50 /G51 /G52 /G53 /G54 /G55 /G56 /G57 /G58 /G59 /G5A /G5B /G5C /G5D /G5E /G5F
/G60 /G61 /G62 /G63 /G64 /G65 /G66 /G67 /G68 /G69 /G6A /G6B /G6C /G6D /G6E /G6F
/G70 /G71 /G72 /G73 /G74 /G75 /G76 /G77 /G78 /G79 /G7A /G7B /G7C /G7D /G7E
% 127
/G7F
/G80 /G81 /G82 /G83 /G84 /G85 /G86 /G87 /G88 /G89 /G8A /G8B /G8C /G8D /G8E /G8F
% 144
/G90 /G91 /G92 /G93 /G94 /G95 /G96 /G97 /G98 /G99 /G9A /G9B /G9C /G9D /G9E /G9F
/GA0 /GA1 /GA2 /GA3 /GA4 /GA5 /GA6 /GA7 /GA8 /GA9 /GAA /GAB /GAC /GAD /GAE /GAF
/GB0 /GB1 /GB2 /GB3 /GB4 /GB5 /GB6 /GB7 /GB8 /GB9 /GBA /GBB /GBC /GBD /GBE /GBF
/GC0 /GC1 /GC2 /GC3 /GC4 /GC5 /GC6 /GC7 /GC8 /GC9 /GCA /GCB /GCC /GCD /GCE /GCF
/GD0 /GD1 /GD2 /GD3 /GD4 /GD5 /GD6 /GD7 /GD8 /GD9 /GDA /GDB /GDC /GDD /GDE /GDF
/GE0 /GE1 /GE2 /GE3 /GE4 /GE5 /GE6 /GE7 /GE8 /GE9 /GEA /GEB /GEC /GED /GEE /GEF
/GF0 /GF1 /GF2 /GF3 /GF4 /GF5 /GF6 /GF7 /GF8 /GF9 /GFA /GFB /GFC /GFD /GFE /GFF
] def
/_TTGlyphNames2
[
% 0 gap
/G0 /G1 /G2 /G3 /G4 /G5 /G6 /G7 /G8 /G9 /G0a /G0b /G0c /G0d /G0e /G0f
/G10 /G11 /G12 /G13 /G14 /G15 /G16 /G17 /G18 /G19 /G1a /G1b /G1c /G1d /G1e /G1f
% 32
/G20 /G21 /G22 /G23 /G24 /G25 /G26 /G27 /G28 /G29 /G2a /G2b /G2c /G2d /G2e /G2f
/G30 /G31 /G32 /G33 /G34 /G35 /G36 /G37 /G38 /G39 /G3a /G3b /G3c /G3d /G3e /G3f
/G40 /G41 /G42 /G43 /G44 /G45 /G46 /G47 /G48 /G49 /G4a /G4b /G4c /G4d /G4e /G4f
/G50 /G51 /G52 /G53 /G54 /G55 /G56 /G57 /G58 /G59 /G5a /G5b /G5c /G5d /G5e /G5f
/G60 /G61 /G62 /G63 /G64 /G65 /G66 /G67 /G68 /G69 /G6a /G6b /G6c /G6d /G6e /G6f
/G70 /G71 /G72 /G73 /G74 /G75 /G76 /G77 /G78 /G79 /G7a /G7b /G7c /G7d /G7e
% 127
/G7F
/G80 /G81 /G82 /G83 /G84 /G85 /G86 /G87 /G88 /G89 /G8a /G8b /G8c /G8d /G8e /G8f
% 144
/G90 /G91 /G92 /G93 /G94 /G95 /G96 /G97 /G98 /G99 /G9a /G9b /G9c /G9d /G9e /G9f
/Ga0 /Ga1 /Ga2 /Ga3 /Ga4 /Ga5 /Ga6 /Ga7 /Ga8 /Ga9 /Gaa /Gab /Gac /Gad /Gae /Gaf
/Gb0 /Gb1 /Gb2 /Gb3 /Gb4 /Gb5 /Gb6 /Gb7 /Gb8 /Gb9 /Gba /Gbb /Gbc /Gbd /Gbe /Gbf
/Gc0 /Gc1 /Gc2 /Gc3 /Gc4 /Gc5 /Gc6 /Gc7 /Gc8 /Gc9 /Gca /Gcb /Gcc /Gcd /Gce /Gcf
/Gd0 /Gd1 /Gd2 /Gd3 /Gd4 /Gd5 /Gd6 /Gd7 /Gd8 /Gd9 /Gda /Gdb /Gdc /Gdd /Gde /Gdf
/Ge0 /Ge1 /Ge2 /Ge3 /Ge4 /Ge5 /Ge6 /Ge7 /Ge8 /Ge9 /Gea /Geb /Gec /Ged /Gee /Gef
/Gf0 /Gf1 /Gf2 /Gf3 /Gf4 /Gf5 /Gf6 /Gf7 /Gf8 /Gf9 /Gfa /Gfb /Gfc /Gfd /Gfe /Gff
] def
% Translates character into one character string
/_char_to_string % int _char_to_string str1
% int - character code
{
1 string dup 0 4 -1 roll put
} def
% This function juxtaposes glyph names from _TTGlyphNames1 or
% _TTGlyphNames2 with its string representation.
% Used for filling of ISOLatin1CharStrings dictionary
/_tt_pair_get % int bool _tt_pair_get name str
% int - character code that is index of glyph name
% bool - if true the name is taken from _TTGlyphNames1
% array otherwise it is taken from _TTGlyphNames2
{
{
dup //_TTGlyphNames1 exch get % get glyph name
exch //_char_to_string exec
}
{
dup //_TTGlyphNames2 exch get dup % int name name
//_TTGlyphNames1 3 index get % int name name name1
eq
{
% skip
pop pop
}
{
exch //_char_to_string exec
}
ifelse
}
ifelse
} def
% The first 256 characters of True Type encoding (excluding 130 - 159) coincide
% with ISO1 Latin encoding.
% Pushes True Type encoding to stack
/_tt_glyph_encode % bool _tt_glyph1_encode name1 str1 ... namen strn
% bool - if true the name is taken from _TTGlyphNames1
% array otherwise it is taken from _TTGlyphNames2
{
/tt_glyph1 exch def
% The first 32 characters is undefined. Skip them
32 1 127
{
tt_glyph1 //_tt_pair_get exec
} for
% Skip characters 128 - 159
160 1 255
{
tt_glyph1 //_tt_pair_get exec
} for
} def
% Character encoding dictionary.
% The keys are most popular glyph names, the values are strings in ISO Latin1 encoding
% associated with this names.
/ISOLatin1CharStrings
<<
/.notdef ()
% 0 - 9
/zero (0) /one (1) /two (2) /three (3) /four (4) /five (5)
/six (6) /seven (7) /eight (8) /nine (9)
%
/ampersand (&) /at (@) /cent (c) /hyphen (-)
/space ( ) /comma (,) /asciicircum (^) /asciitilde (~)
/asterisk (*) /colon (:) /period (.) /periodcentered (\267)
/minus (-) /endash (--) /emdash (---) /exclam (!)
/fraction (/) /backslash (\\) /bar (|) /braceleft ({)
/braceright (}) /bracketleft ([) /bracketright (]) /brockenbar (|)
/dolar ($) /equal (=) /greater (>) /greaterequal (>=)
/less (<) /lessequal (<=) /numbersign (#) /parenleft (\()
/parenright (\)) /percent (%) /perthousand (o/oo) /plus (+)
/question (?) /quotedbl (") /quotedblbase (,,) /quotedblleft (``)
/quotedblright ('') /quoteleft (`) /quoteright (')
/quotesinglbase (') /quotesingle (') /slash (/) /semicolon (;)
/trademark (^TM) /underscore (_) /ff (ff) /fi (fi)
/ffi (ffi) /fl (fl) /ffl (ffl) /floring (f)
/second ('') /minute (') /daggerdbl (#)
% ISO Latin1 special
/acute (\264) /breve (\226) /caron (\237) /circumflex (\223)
/dieresis (\250) /dotaccent (\227) /dotlessi (\220) /grave (\221)
/hungarumlaut (\235) /macron (\257) /ogonek (\236) /ring (\232)
/tilde (\224)
% Other standard latin characters
/ae (ae) /AE (AE) /Aacute (\301) /Abreve (A\226)
/Acircumpflex (\302) /Adieresis (\304) /Agrave (\300) /Amacron (A\257)
/Aogonek (A\236) /Aring (\305) /Atilde (\303) /Cacute (C\264)
/Ccaron (C\237) /Ccedilla (\307) /Dcaron (D\237) /Dcroat (\320)
/Eacute (\311) /Ecaron (E\237) /Ecircumflex (\312) /Edieresis (\313)
/Edotaccent (E\227) /Egrave (\310) /Emacron (E\257) /Eogonek (E\236)
/Eth (\320) /Gbreve (G\226) /Gcommaaccent (G,) /Iacute (\315)
/Icircumflex (\316) /Idieresis (\317) /Idotaccent (I\227) /Igrave (\314)
/Imacron (I\257) /Iogonek (I\236) /Kcommaaccent (K,) /Lacute (L\264)
/Lcaron (L\237) /Lcommaaccent (L,) /Lslash (L/) /Nacute (N\234)
/Ncaron (N\237) /Ncommaaccent (N,) /Ntilde (\321) /OE (OE)
/Oacute (\323) /Ocircumflex (\324) /Odieresis (\326) /Ograve (\322)
/Ohungarumlaut (O\235) /Omacron (O\257) /Oslash (\330) /Otilde (\325)
/Racute (R\264) /Rcaron (R\237) /Rcommaaccent (R,) /Sacute (S\264)
/Scaron (S\237) /Scedilla (S,) /Scommaaccent (S,) /Tcaron (T\237)
/Tcommaaccent (T,) /Thorn (\336) /Uacute (\332) /Ucircumflex (\333)
/Udieresis (\334) /Ugrave (\331) /Uhungarumlaut (U\235) /Umacron (U\257)
/Uogonek (U\236) /Uring (U\232) /Yacute (\335) /Ydieresis (Z\250)
/Zacute (Z\264) /Zcaron (Z\237) /Zdotaccent (Z\227) /aacute (\341)
/abreve (a\226) /acircumflex (\342) /adieresis (\344) /agrave (\340)
/amacron (a\257) /aogonek (a\236) /aring (\345) /atilde (\343)
/bullet (.) /cacute (c\264) /ccaron (c\237) /ccedilla (\347)
/commaaccent (,) /copyright (\251) /currency (\244) /dcaron (d\237)
/dcroat (d) /degree (\260) /devide (\360) /eacute (\351)
/ecaron (e\237) /ecircumflex (\352) /edieresis (\353) /edotaccent (e\227)
/egrave (\350) /ellipsis (...) /emacron (e\267) /eogonek (e\236)
/eth (\360) /exclamdown (!) /dagger (+) /gbreve (g\226)
/gcommaaccent (g,) /germandbls (\337) /guillemotleft (\253) /guillemotright (\273)
/guilsinglleft (<) /guilsinglright (>) /iacute (\355) /icircumflex (\356)
/idieresis (\357) /igrave (\354) /imacron (i\257) /iogonek (i\236)
/kcommaaccent (k,) /lacute (l\264) /lcaron (l\237) /lcommaaccent (l,)
/logicalnot (\254) /lslash (l/) /mu (\265) /multiply (\327)
/nacute (n\264) /ncaron (n\237) /ncommaaccent (n,) /notequal (=)
/ntilde (\361) /oacute (\363) /ocircumflex (\364) /odieresis (\366)
/oe (oe) /ograve (\362) /ohungarumlaut (o\237) /omacron (o\267)
/onehalf (1/2) /onequarter (1/4) /onesuperior (\271) /ordfeminine (\252)
/ordmasculine (\272) /oslash (\370) /otilde (\365) /paragraph (\266)
/partialdiff (d) /plusminus (\261) /questiondown (?) /racute (\162)
/rcaron (r\237) /rcommaaccent (r,) /registered (\256) /sacute (s\264)
/scaron (s\237) /scedilla (s,) /scommaaccent (s,) /section (\247)
/sterling (\243) /tcaron (t\237) /tcommaaccent (t,) /thorn (\376)
/threequarter (3/4) /threesuperior (\263) /twosuperior (\262) /uacute (\372)
/ucircumflex (\373) /udieresis (\374) /ugrave (\371) /uhungarumlaut (u\237)
/umacron (u\257) /uogonek (u\236) /uring (u\232) /yacute (\375)
/ydieresis (\377) /yen (\245) /zacute (z\264) /zcaron (z\237)
/zdotaccent (z\227)
% Some characters from Symbol Encoding
/Alpha (A) /Beta (B) /Chi (X) /Delta (\\Delta)
/Epsilon (E) /Eta (H) /Euro (C) /Gamma (\\Gamma)
/Ifraktur (J) /Iota (I) /Kappa (K) /Lambda (\\Lambda)
/Mu (M) /Nu (N) /Omega (O) /Omicron (O)
/Phi (\\Phi) /Pi (\\Pi) /Psi (\\Psi) /Rfraktur (R)
/Rho (P) /Sigma (\\Sigma) /Tau (T) /Theta (\\Theta)
/Upsilon (Y) /Upsilon1 (Y) /Xi (\\Xi) /Zeta (Z)
/aleph (x) /alpha (a) /angleleft (<) /angleright (>)
/approxequal (=) /asteriskmath (*) /chi (X) /copyrightsans (\(C\))
/copyrightserif (\(C\)) /delta (\\delta) /dotmath (.) /element (c)
/equivalence (=) /eta (n) /gamma (\\gamma) /iota (i)
/kappa (k) /lambda (\lambda) /logicaland (^) /logicalor (V)
/omicron (o) /phi (\\phi) /phi1 (\\phi) /pi (\\pi)
/psi (\\psi) /registersans (\(R\)) /registerserif (\(R\))
/rho (p) /sigma (o) /sigma1 (c) /tau (t)
/theta (\\theta) /theta1 (\\theta) /trademarksans (^TM) /trademarkserif (^TM)
% self-named glyphs OT1 encoding including letters A - Z, a - z
0 1 127
{
dup //_char_to_string exec cvn _DvipsOT1 3 -1 roll get
} for
% number-named glyph names
0 1 127
{
dup 10 3 string cvrs cvn _DvipsOT1 3 -1 roll get
} for
% True Type encoding
true //_tt_glyph_encode exec
false //_tt_glyph_encode exec
% True Type from 130 to 159
/G82 (') /G83 (f) /G84 ('') /G85 (...) /G86 (+) /G87 (#) /G88 (\223)
/G89 (o/oo) /G8A (S\237) /G8B (<) /G8C (OE) /G91 (`) /G92 (') /G93 (``)
/G94 ('') /G95 (.) /G96 (--) /G97 (---) /G98 (~) /G99 (^TM) /G9A (s\237)
/G9B (>) /G9C (oe) /G9F (Y\250)
/G8a (S\237) /G8b (<) /G8c (OE) /G9a (s\237) /G9b (>) /G9c (oe) /G9f (Y\250)
>> def
% Converts integer into string and writes it to stdout
/_int_output % int _int_output -
{
20 string cvs print
} def
% Transforms point (x,y) into output coordinates
/_to_output_coord % x y _to_output_coord x' y'
{
transform
100 mul
round cvi
exch
100 mul
round cvi
exch
} def
% Writes current point into stdout
/_print_current_location % - _print_current_location -
{
currentpoint % stack: x y
//_to_output_coord exec % stack: x' y'
exch
//_int_output exec % print x'
( ) print
//_int_output exec % print y'
( ) print
} def
% Replace all string character by ISO Latin1 equivalent
% according to ISOLatin1CharStrings dictionary
/_output_string_get % str _print_string str2 str2Length
{
//userdict /IG_PStoTxtDict get
begin
/outString () def
/outWidth 0 def
{
dup % stack: ch ch
% Get the glyph name
currentfont /Encoding get exch get % stack: ch name
ISOLatin1CharStrings exch 2 copy % stack: ch ISOLatin1CharStrings name ISOLatin1CharStrings name
known
{ % stack: ch ISOLatin1CharStrings name
get % stack: ch str1
exch pop % stack: str1
}
% undefined in ISOLatin1CharStrings name
{ % stack: ch ISOLatin1CharStrings name
pop pop % stack: ch
dup dup 32 ge exch 126 le and
{
% Try to get ascii character.
% For some fonts it is wrong but
% usually we will get right results
1 string dup % stack: ch str str
0 4 -1 roll % stack: str str 0 ch
put % stack: str1
}
{
% we cannot say anything about this character
pop
(#)
}
ifelse
}
ifelse
% put to result string
dup length outWidth add string dup dup % stack: str1 str str str
0 outString putinterval % stack: str1 outString_ outString_
outWidth 4 -1 roll putinterval % stack: outString_str1
/outString exch def
/outWidth outString length def
} forall
outString
outWidth
end % IG_PStoTxtDict
} def
% Writes #S directive into stdout
/IG_output_make % ... string proc IG_output_make -
% proc - is operator that shows the string
{
//userdict /IG_PStoTxtDict get
begin
1 index
dup () eq
{ % stack: ... string proc string
% draw text to change current point
pop
exec
}
{ % stack: ... string proc string
(#S ) print
% print string start location
_print_current_location % stack: ... string proc string
dup % stack: ... string proc string string
% print glyphs count
length _int_output % stack: ... string proc string
( ) print
% get the string in ISO Latin1 encoding and its length
_output_string_get % stack: ... string proc str strLength
% print output string length
_int_output % stack: ... string proc str
( ) print
% print text
print % stack: ... string proc
( ) print
% draw text to change current point
exec % stack: -
% print string end location
_print_current_location
(\n) print
} ifelse
end % IG_PStoTxtDict
} def
end %IG_PStoTxtDict begin
% Redefine the end-of-page operators.
/copypage
{
(\n) print
(#P\n) print
(\n) print
} bind IG_redef
/showpage
{
//userdict /copypage get exec
initgraphics
} bind IG_redef
% Redefine "show" operators to extract text
/show
{
//systemdict /show get
//userdict /IG_PStoTxtDict get /IG_output_make get exec
} bind IG_redef
/ashow
{
//systemdict /ashow get
//userdict /IG_PStoTxtDict get /IG_output_make get exec
} bind IG_redef
/widthshow
{
//systemdict /widthshow get
//userdict /IG_PStoTxtDict get /IG_output_make get exec
} bind IG_redef
/awidthshow
{
//systemdict /awidthshow get
//userdict /IG_PStoTxtDict get /IG_output_make get exec
} bind IG_redef
/kshow
{
//userdict /IG_PStoTxtDict get
begin
% stack: proc str
dup % stack: proc str str
/str_length exch length def % stack: proc str
{ % stack: proc ch
/str_length str_length 1 sub def
_char_to_string % stack: proc str1
//userdict /show get exec % stack: proc
str_length 0 gt
{
dup exec % stack: proc
} if
} forall
end % IG_PStoTxtDict % stack: proc
pop
} bind IG_redef
/xshow
{
pop //userdict /show get exec
} bind IG_redef
/yshow
{
pop //userdict /show get exec
} bind IG_redef
/xyshow
{
pop //userdict /show get exec
} bind IG_redef
end % userdict begin
systemdict begin .bindoperators end
/bind /.bind load def
systemdict readonly pop